"""
准备词典
"""
from tqdm import tqdm

from chatbot.chatbot_core.word_sequence import WordSequence
import chatbot.config as config
import pickle


def save_dnnsort_ws(by_word=True):
    ws = WordSequence()
    f1 = open(config.sort_q_by_word_path if by_word else config.sort_q_path, encoding="utf-8")
    f2 = open(config.sort_sim_q_by_word_path if by_word else config.sort_sim_q_path, encoding="utf-8")
    for line in tqdm(f1.readlines()):
        ws.fit(line.strip().split())

    ws.build_vocab(min_count=5)
    print(len(ws))
    pickle.dump(ws, open(config.sort_ws_by_word_save_path if by_word else config.sort_ws_save_path, "wb"))
